Attaching package: 'zoo'
The following objects are masked from 'package:base':
as.Date, as.Date.numeric
library(RcppRoll) library(prophet)
Loading required package: Rcpp
Attaching package: 'Rcpp'
The following object is masked from 'package:rsample':
populate
Loading required package: rlang
Attaching package: 'rlang'
The following objects are masked from 'package:purrr':
%@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
flatten_raw, invoke, splice
library(corrplot)
corrplot 0.95 loaded
library(anomalize) library(tsibble)
Registered S3 method overwritten by 'tsibble':
method from
as_tibble.grouped_df dplyr
Attaching package: 'tsibble'
The following object is masked from 'package:zoo':
index
The following object is masked from 'package:lubridate':
interval
The following objects are masked from 'package:base':
intersect, setdiff, union
library(xgboost)
Attaching package: 'xgboost'
The following object is masked from 'package:dplyr':
slice
Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(rsample)library(fable)
Loading required package: fabletools
Attaching package: 'fabletools'
The following object is masked from 'package:yardstick':
accuracy
The following object is masked from 'package:parsnip':
null_model
The following objects are masked from 'package:infer':
generate, hypothesize
library(dplyr)data <-read.csv("~/Downloads/dynamic_supply_chain_logistics_dataset.csv")data <- data %>%mutate(timestamp =as.POSIXct(timestamp, tz ="UTC"),date =as_date(timestamp)) %>%group_by(date) %>%summarise(# SUM for flowshistorical_demand =sum(historical_demand, na.rm =TRUE),shipping_costs =sum(shipping_costs, na.rm =TRUE),# MEAN for continuous metricsfuel_consumption_rate =mean(fuel_consumption_rate, na.rm =TRUE),eta_variation_hours =mean(eta_variation_hours, na.rm =TRUE),traffic_congestion_level =mean(traffic_congestion_level, na.rm =TRUE),warehouse_inventory_level=mean(warehouse_inventory_level, na.rm =TRUE),lead_time_days =mean(lead_time_days, na.rm =TRUE),delivery_time_deviation =mean(delivery_time_deviation, na.rm =TRUE),delay_probability =mean(delay_probability, na.rm =TRUE),loading_unloading_time =mean(loading_unloading_time, na.rm =TRUE),customs_clearance_time =mean(customs_clearance_time, na.rm =TRUE),driver_behavior_score =mean(driver_behavior_score, na.rm =TRUE),fatigue_monitoring_score =mean(fatigue_monitoring_score, na.rm =TRUE),high_risk_fraction =mean(risk_classification =="High Risk", na.rm =TRUE),order_fulfillment_status =names(sort(table(order_fulfillment_status),decreasing=TRUE))[1],cargo_condition_status =names(sort(table(cargo_condition_status),decreasing=TRUE))[1],handling_equipment_availability =names(sort(table(handling_equipment_availability),decreasing=TRUE))[1],weather_condition_severity =names(sort(table(weather_condition_severity),decreasing=TRUE))[1],.groups ="drop" ) %>%mutate(date =as_date(date)) %>%arrange(date)categorical_cols_for_factor <-c("order_fulfillment_status","cargo_condition_status","handling_equipment_availability","weather_condition_severity")# Convert them to factorsdata <- data %>%mutate(across(all_of(categorical_cols_for_factor), as.factor))data <- data %>%mutate(across(where(is.numeric), ~zoo::na.locf(.x, na.rm =FALSE)))
categorical_cols_for_factor <-c("order_fulfillment_status", "weather_condition_severity","cargo_condition_status","handling_equipment_availability")data <- data %>%mutate(across(all_of(categorical_cols_for_factor), as.factor))data <- data %>%mutate(high_risk_fraction =as.numeric(high_risk_fraction))
df_fe <- data %>%mutate(day_of_week =wday(date, label =TRUE, abbr =FALSE), week_of_year =week(date),month =month(date, label =TRUE, abbr =FALSE), is_weekend =factor(ifelse(wday(date) %in%c(1, 7), 1, 0), levels =c(0,1), labels =c("No", "Yes")),demand_lag_1 =lag(historical_demand, 1),demand_lag_7 =lag(historical_demand, 7),demand_lag_30 =lag(historical_demand, 30),demand_ma_7 =rollmean(historical_demand, 7, fill =NA, align ="right"),fuel_ma_7 =rollmean(fuel_consumption_rate, 7, fill =NA, align ="right"),eta_ma_7 =rollmean(eta_variation_hours, 7, fill =NA, align ="right"),risk_ma_7 =rollmean(high_risk_fraction, 7, fill =NA, align ="right"),log_demand =log1p(historical_demand),log_shipping =log1p(shipping_costs),fuel_consumption_rate_ma_7 =roll_meanr(fuel_consumption_rate, n =7, fill =NA),eta_variation_hours_ma_7 =roll_meanr(eta_variation_hours, n =7, fill =NA) ) %>%drop_na()
ggplot(df_fe, aes(x = date, y = historical_demand)) +geom_line(alpha =0.6) +geom_smooth(method ="loess", span =0.2, color ="blue", se =FALSE) +# Replaced smooth_veclabs(title ="Historical Demand Over Time",y ="Historical Demand") +theme_minimal()
`geom_smooth()` using formula = 'y ~ x'
# Congestion vs ETA# Ensure traffic_congestion_level is treated as a factor if it represents discrete levelsggplot(df_fe, aes(x =factor(traffic_congestion_level), y = eta_variation_hours)) +geom_boxplot(fill ="lightblue") +labs(title ="ETA Variation by Traffic Congestion Level",x ="Traffic Congestion Level (Factor)",y ="ETA Variation (Hours)") +theme_minimal()
# Weather vs Shipping Costsggplot(df_fe, aes(x = weather_condition_severity, y = shipping_costs, fill = weather_condition_severity)) +geom_boxplot() +labs(title ="Shipping Costs by Weather Severity",x ="Weather Condition Severity",y ="Shipping Costs") +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))
# Correlation MatrixM <- df_fe %>%select(where(is.numeric)) %>%# Select all numeric columns for correlationselect(-week_of_year) %>%# Remove week_of_year if you don't want it in correlation# or any other numeric columns that are IDs like date componentscor(use ="pairwise.complete.obs") # use="pairwise.complete.obs" handles NAs in correlation calccorrplot(M, method ="circle", type ="upper", order ="hclust", tl.col ="black", tl.srt =45)
df_fe %>%as_tsibble(index = date) %>%time_decompose(fuel_consumption_rate, method ="stl", frequency ="week") %>%# Changed trend = "periodic" to frequency = "week"anomalize(remainder, method ="iqr", alpha =0.05) %>%time_recompose() %>%plot_anomalies(time_recomposed =TRUE) +labs(title ="Fuel Consumption Anomalies") +theme_minimal()
Converting from tbl_ts to tbl_time.
Auto-index message: index = date
frequency = 7 days
trend = 91 days
Registered S3 method overwritten by 'quantmod':
method from
as.zoo.data.frame zoo
# ETA anomaliesdf_fe %>%as_tsibble(index = date) %>%time_decompose(eta_variation_hours, method ="stl", frequency ="week") %>%# Changed trend = "periodic" to frequency = "week"anomalize(remainder, method ="iqr", alpha =0.05) %>%time_recompose() %>%plot_anomalies(time_recomposed =TRUE) +labs(title ="ETA Variation Anomalies") +theme_minimal()
Converting from tbl_ts to tbl_time.
Auto-index message: index = date
frequency = 7 days
trend = 91 days
Decomposed costs into transport infastrcuture and risk
delivery_recipe <-recipe(delivery_time_deviation ~ ., data = train_data) %>%update_role(date, new_role ="ID") %>%# Keep date as IDstep_rm(order_fulfillment_status, cargo_condition_status, handling_equipment_availability) %>%step_integer(all_nominal_predictors()) %>%step_impute_mean(all_numeric_predictors()) %>%step_normalize(all_numeric_predictors())# Random Forest model with fixed parametersrf_model <-rand_forest(mtry =5, # number of predictors sampled at each splittrees =1000, # number of treesmin_n =5# minimum node size ) %>%set_engine("ranger", importance ="permutation") %>%set_mode("regression")# Fit workflowrf_fit <-workflow() %>%add_recipe(delivery_recipe) %>%add_model(rf_model) %>%fit(data = train_data)# Predict on test datarf_predictions <-predict(rf_fit, new_data = test_data) %>%bind_cols(test_data %>%select(date, delivery_time_deviation))# Calculate metricsmae_rf <-mae(rf_predictions, truth = delivery_time_deviation, estimate = .pred)rmse_rf <-rmse(rf_predictions, truth = delivery_time_deviation, estimate = .pred)rsq_rf <-rsq(rf_predictions, truth = delivery_time_deviation, estimate = .pred)print(mae_rf)
# A tibble: 1 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 mae standard 0.162
print(rmse_rf)
# A tibble: 1 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 rmse standard 0.265
print(rsq_rf)
# A tibble: 1 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 rsq standard 0.943
# Predicted vs Actual scatter plotggplot(rf_predictions, aes(x = delivery_time_deviation, y = .pred)) +geom_point(alpha =0.6) +geom_abline(slope =1, intercept =0, color ="red", linetype ="dashed") +labs(title ="Predicted vs Actual Delivery Time Deviation (Random Forest)",subtitle =paste0("MAE: ", round(mae_rf$.estimate, 4), ", RMSE: ", round(rmse_rf$.estimate, 4)),x ="Actual Delivery Time Deviation",y ="Predicted Delivery Time Deviation" ) +theme_minimal() +coord_fixed()
# Actual vs Predicted over timeggplot(rf_predictions, aes(x = date)) +geom_line(aes(y = delivery_time_deviation, color ="Actual")) +geom_line(aes(y = .pred, color ="Predicted"), linetype ="dashed") +labs(title ="Actual vs Predicted Delivery Time Deviation Over Time",x ="Date",y ="Delivery Time Deviation",color ="Type" ) +theme_minimal()